#Alexander F. Gazmararian
#afg2@princeton.edu
#January 9, 2024

#Load packages
library(dplyr)
library(readxl)
library(stringr)
library(reshape2)
library(here)
library(tidycensus)

#FIPS
fips <- tidycensus::fips_codes
fips$fips <- as.numeric(paste0(fips$state_code, fips$county_code))
county_fips <- subset(fips, select = c(fips, county, state))
names(county_fips)[2] <- "name"
state_fips <- read.table(here("data", "input", "eiagenerator", "state_fips_master.csv"), sep=",", header=TRUE)
# Clean state_fips
state_fips <- state_fips[,c("state_name","state_abbr","fips")]

#1992
# Read Files
gen <- read_excel(here("data", "input", "eiagenerator", "TYPE392.xls"))
plant <- read_excel(here("data", "input", "eiagenerator", "PLNT92.xls"))

# Year, FIPS Columns
dt_1992 <- county_fips
dt_1992 <- dt_1992[complete.cases(dt_1992),]
dt_1992$Year <- "1992"
dt_1992 <- dt_1992[,-3]
colnames(dt_1992) <- c("FIPS","County","Year")

# Create County FIP
plant <- merge(state_fips, plant, by.x = "state_abbr", by.y = "PLNTST")
plant$CTYCODE <- str_pad(plant$CTYCODE, width=3, side="left", pad="0")
plant$FIPS <- paste0(plant$fips, plant$CTYCODE)

# Remove Unnecessary Columns in plant
plant <- plant[, c("FIPS","state_abbr","state_name","UTILCODE","PLNTCODE", "PLNTNAME","CTYNAME","PLNTZIP")]

# Gas, Wind, or Solar?
gen <- gen %>% mutate(TypeofFuel = case_when(OPENSRC1 == "BFG" ~ "Gas",
                                             OPENSRC1 == "GAS" ~ "Gas",
                                             OPENSRC1 == "LNG" ~ "Gas",
                                             OPENSRC1 == "LPG" ~ "Gas",
                                             OPENSRC1 == "NG" ~ "Gas",
                                             OPENSRC1 == "RG" ~ "Gas",
                                             OPENSRC1 == "SNG" ~ "Gas",
                                             OPENSRC1 == "ANT" ~ "Coal",
                                             OPENSRC1 == "BIT" ~ "Coal",
                                             OPENSRC1 == "COL" ~ "Coal",
                                             OPENSRC1 == "CWM" ~ "Coal",
                                             OPENSRC1 == "LIG" ~ "Coal",
                                             OPENSRC1 == "SUB" ~ "Coal",
                                             OPENSRC1 == "SUN" ~ "Sun",
                                             OPENSRC1 == "WND" ~ "Wind"))
gen <- gen[,c("UTILCODE","PLNTCODE","INSERVMTH","INSERVYR","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("UTILCODE","PLNTCODE"))
dt_ref <- dt_ref[complete.cases(dt_ref),]

# Defines dataset for identifying if there's any 
# plant in that county in that year, and 0 otherwise
dt_id <- dt_ref[,c("FIPS","TypeofFuel")]
dt_id <- distinct(dt_id)
dt_id$count <- 1
dt_id <- dcast(dt_id, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)

dt_1992 <- merge(dt_1992, dt_id, by='FIPS', all.x=TRUE)
dt_1992[is.na(dt_1992)] <- 0

# Defines dataset for identifying if there's a
# new plant in that county in that year, and 0 otherwise
dt_new <- dt_ref[,c("FIPS","TypeofFuel", "INSERVYR")]
dt_new <- subset(dt_new, INSERVYR %in% c("1992","1991","1990","1989"))
dt_new$count <- 1
dt_new <- dcast(dt_new, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)
glimpse(dt_new)
colnames(dt_new) <- c("FIPS","NewCoal","NewGas","NewSun","NewWind")

dt_1992 <- merge(dt_1992, dt_new, by='FIPS', all.x=TRUE)
dt_1992[is.na(dt_1992)] <- 0

saveRDS(dt_1992, here("data", "inter", "eiagenerator", "1992.rds"))

#1996
#load data
gen <- read_excel(here("data", "input", "eiagenerator", "TYPE3Y96.xls"))
plant <- read_excel(here("data", "input", "eiagenerator", "PLANTY96.xls"))

# Year, FIPS Columns
dt_1996 <- county_fips
dt_1996 <- dt_1996[complete.cases(dt_1996),]
dt_1996$Year <- "1996"
dt_1996 <- dt_1996[,-3]
colnames(dt_1996) <- c("FIPS","County","Year")

# Create County FIP
plant <- merge(state_fips, plant, by.x = "state_abbr", by.y = "PLNTST")
plant$CTYCODE <- str_pad(plant$CTYCODE, width=3, side="left", pad="0")
plant$FIPS <- paste0(plant$fips, plant$CTYCODE)

# Remove Unnecessary Columns in plant
plant <- plant[, c("FIPS","state_abbr","state_name","UTILCODE","PLNTCODE", "PLNTNAME","CTYNAME","PLNTZIP")]

# Gas, Wind, or Solar?
gen <- gen %>% mutate(TypeofFuel = case_when(ENERGYSCE1 == "BFG" ~ "Gas",
                                             ENERGYSCE1 == "GAS" ~ "Gas",
                                             ENERGYSCE1 == "LNG" ~ "Gas",
                                             ENERGYSCE1 == "LPG" ~ "Gas",
                                             ENERGYSCE1 == "NG" ~ "Gas",
                                             ENERGYSCE1 == "RG" ~ "Gas",
                                             ENERGYSCE1 == "SNG" ~ "Gas",
                                             ENERGYSCE1 == "ANT" ~ "Coal",
                                             ENERGYSCE1 == "BIT" ~ "Coal",
                                             ENERGYSCE1 == "COL" ~ "Coal",
                                             ENERGYSCE1 == "CWM" ~ "Coal",
                                             ENERGYSCE1 == "LIG" ~ "Coal",
                                             ENERGYSCE1 == "SUB" ~ "Coal",
                                             ENERGYSCE1 == "SUN" ~ "Sun",
                                             ENERGYSCE1 == "WND" ~ "Wind"))
gen <- gen[,c("UTILCODE","PLNTCODE","INSERVMTH","INSERVYR","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("UTILCODE","PLNTCODE"))
dt_ref <- dt_ref[complete.cases(dt_ref),]

# Defines dataset for identifying if there's any 
# plant in that county in that year, and 0 otherwise
dt_id <- dt_ref[,c("FIPS","TypeofFuel")]
dt_id <- distinct(dt_id)
dt_id$count <- 1
dt_id <- dcast(dt_id, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)
dt_1996 <- merge(dt_1996, dt_id, by='FIPS', all.x=TRUE)
dt_1996[is.na(dt_1996)] <- 0

# Defines dataset for identifying if there's a
# new plant in that county in that year, and 0 otherwise
dt_new <- dt_ref[,c("FIPS","TypeofFuel", "INSERVYR")]
dt_new <- subset(dt_new, INSERVYR %in% c("1996", "1995", "1994", "1993"))
dt_new$count <- 1
dt_new <- dcast(dt_new, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)
glimpse(dt_new)
colnames(dt_new) <- c("FIPS","NewCoal", "NewGas", "NewSun", "NewWind")

dt_1996 <- merge(dt_1996, dt_new, by='FIPS', all.x=TRUE)
dt_1996[is.na(dt_1996)] <- 0

saveRDS(dt_1996, here("data", "inter", "eiagenerator", "1996.rds"))
